#download packages

require(ggplot2)
library(tidyverse)
library(dplyr)
require(car)
require(MASS)
require(boot)
library(lattice) # alternatively can use package ZIM for zero-inflated models
library(lmtest)
library(MASS)
library(tidyverse)
library(easystats)
library(ggeffects)
library(ggplot2)
library(scales)
library(gridExtra)
library(cowplot)
p <- read_csv("DOH.csv")
p
## # A tibble: 9,867 × 32
##    Post_oldid Real_ID date    COVID_Post RiskFactor_2_CTA SocialDisparities_3_…¹
##    <chr>        <dbl> <chr>        <dbl>            <dbl>                  <dbl>
##  1 1             2069 9/22/20          1               88                     88
##  2 2             2070 8/27/20          1               88                     88
##  3 3             2071 12/29/…          1               88                     88
##  4 4             2072 9/29/20          1               88                     88
##  5 5             2073 10/6/20          1               88                     88
##  6 6             2074 12/31/…          1               88                     88
##  7 7              403 4/2/20           1               88                     88
##  8 8             5425 12/1/20          1               88                     88
##  9 9             5842 12/20/…          1               88                     88
## 10 10            2075 3/5/20           1               88                     88
## # ℹ 9,857 more rows
## # ℹ abbreviated name: ¹​SocialDisparities_3_CTA
## # ℹ 26 more variables: Debunk_4_CTA <dbl>, UncertaintyReduction_5_CTA <dbl>,
## #   Testing_6_CTA <dbl>, Vaccine_7_CTA <dbl>, Mental_8_YesNo <dbl>,
## #   Emotion_9_Number <dbl>, Praise_10_Number <dbl>, Pop_11_Number <dbl>,
## #   Language_12_Number <dbl>, Action_13_CTA <dbl>, Positive_14a_Number <dbl>,
## #   Negative_14b_Number <dbl>, Individual_14c_Number <dbl>, …
# Install and load the openxlsx package
#install.packages("openxlsx")
#library(openxlsx)

# Assuming p is your dataframe
# Replace this with your actual dataframe
# For example, p <- read.csv('your_data.csv') or p <- data.frame(...)

# Write the dataframe to an Excel file
#write.xlsx(p, file = "DOH_Final.xlsx")

#Change variable names

p1 <- 
  p %>% 
  rename(State_recode = `State_ recode`, 
        White_percent = `White_\bpercent`, 
        Risk_Factor ='RiskFactor_2_CTA', 
        Social_Disparities = 'SocialDisparities_3_CTA', 
        Debunk_MI = 'Debunk_4_CTA',
        Uncertainty_Reduction = 'UncertaintyReduction_5_CTA',  
        Testing = 'Testing_6_CTA', 
        Vaccine = 'Vaccine_7_CTA', 
        Pre_Action = 'Action_13_CTA', 
        Positive_eff = 'Positive_14a_Number', 
        Negative_eff = 'Negative_14b_Number', 
        Individual_eff = 'Individual_14c_Number', 
        Collective_eff = 'Collective_14d_Number', 
        Prosperity_ranking = 'ProspertiyRanking', 
        Minority_percent = 'minority_percent'
  ) 

#Frequency with 88

# Plotting frequency distribution for each variable
for (col in names(p1)) {
  if (is.numeric(p1[[col]])) {
    plot <- ggplot(p1, aes_string(x = as.name(col))) +
      geom_bar() +
      labs(title = paste("Frequency Distribution of", col),
           x = col,
           y = "Frequency")
    print(plot)
    # Save or print the plot as needed
    # ggsave(paste("plot_", col, ".png", sep=""), plot = plot, device = "png")
  }
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 422 rows containing non-finite values (`stat_count()`).

p4 <- 
  p1 %>% 
  mutate_all(~ ifelse(. %in% c('88', '888', "99", "999"), 0, .))
p4
## # A tibble: 9,867 × 32
##    Post_oldid Real_ID date   COVID_Post Risk_Factor Social_Disparities Debunk_MI
##    <chr>        <dbl> <chr>       <dbl>       <dbl>              <dbl>     <dbl>
##  1 1             2069 9/22/…          1           0                  0         0
##  2 2             2070 8/27/…          1           0                  0         0
##  3 3             2071 12/29…          1           0                  0         0
##  4 4             2072 9/29/…          1           0                  0         0
##  5 5             2073 10/6/…          1           0                  0         0
##  6 6             2074 12/31…          1           0                  0         0
##  7 7              403 4/2/20          1           0                  0         0
##  8 8             5425 12/1/…          1           0                  0         0
##  9 9             5842 12/20…          1           0                  0         0
## 10 10            2075 3/5/20          1           0                  0         0
## # ℹ 9,857 more rows
## # ℹ 25 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## #   Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## #   Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## #   Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## #   Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## #   Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
table(p4$Uncertainty_Reduction)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 1349 4561 1733  941  473  306  199  102   68   24   61   22    3    4    4    5 
##   16   17   18   25   26 
##    5    1    4    1    1
table(p4$Testing)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 8741  644  279   81   32   12   17    4    2    4   28   12    1    3    1    3 
##   18   26 
##    2    1
table(p4$Vaccine)
## 
##    0    1    2    3    4    5    6    7    8    9   15 
## 9394  272  134   26   11   10   10    5    2    1    2

#frequency without 88

# Plotting frequency distribution for each variable
for (col in names(p4)) {
  if (is.numeric(p4[[col]])) {
    plot1 <- ggplot(p4, aes_string(x = as.name(col))) +
      geom_bar() +
      labs(title = paste("Frequency Distribution of", col),
           x = col,
           y = "Frequency")
    print(plot1)
    # Save or print the plot as needed
    # ggsave(paste("plot_", col, ".png", sep=""), plot = plot, device = "png")
  }
}

## Warning: Removed 422 rows containing non-finite values (`stat_count()`).

============ #Transform to bi-nominal only 1 and 0 for various variables

p3 <- 
  p1 %>% 
mutate(Risk_Factor = ifelse(Risk_Factor %in% c('88', '888'), 0, 1)) %>% 
mutate(Social_Disparities = ifelse(Social_Disparities %in% c('88', '888'), 0, 1)) %>% 
mutate(Debunk_MI = ifelse(Debunk_MI %in% c('88', '888'), 0, 1)) %>% 
mutate(Uncertainty_Reduction = ifelse(Uncertainty_Reduction %in% c('88', '888'), 0, 1))
p3
## # A tibble: 9,867 × 32
##    Post_oldid Real_ID date   COVID_Post Risk_Factor Social_Disparities Debunk_MI
##    <chr>        <dbl> <chr>       <dbl>       <dbl>              <dbl>     <dbl>
##  1 1             2069 9/22/…          1           0                  0         0
##  2 2             2070 8/27/…          1           0                  0         0
##  3 3             2071 12/29…          1           0                  0         0
##  4 4             2072 9/29/…          1           0                  0         0
##  5 5             2073 10/6/…          1           0                  0         0
##  6 6             2074 12/31…          1           0                  0         0
##  7 7              403 4/2/20          1           0                  0         0
##  8 8             5425 12/1/…          1           0                  0         0
##  9 9             5842 12/20…          1           0                  0         0
## 10 10            2075 3/5/20          1           0                  0         0
## # ℹ 9,857 more rows
## # ℹ 25 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## #   Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## #   Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## #   Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## #   Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## #   Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
glimpse(p3)
## Rows: 9,867
## Columns: 32
## $ Post_oldid            <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID               <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date                  <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action            <dbl> 0, 0, 0, 0, 0, 0, 7, 0, 1, 1, 1, 1, 0, 1, 1, 3, …
## $ Positive_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode          <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli              <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta              <dbl> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent         <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent      <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent       <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking    <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year                  <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month                 <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day                   <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state                 <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases                 <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
table(p3$Risk_Factor)
## 
##    0    1 
## 8524 1343
table(p3$Social_Disparities)
## 
##    0    1 
## 9691  176
table(p3$Debunk_MI)
## 
##    0    1 
## 9432  435
table(p3$Uncertainty_Reduction)
## 
##    0    1 
##  478 9389
table(p3$Positive_eff)
## 
##    0    1    2    3    4    5    6    7    8 
## 5875 2563  949  289  124   35   20    7    5
table(p3$Negative_eff)
## 
##    0    1    2    3    5    6 
## 9728   99   27   10    2    1
table(p3$Collective_eff)
## 
##    0    1    2    3    4    5    6    7    8 
## 6635 2315  622  186   64   24   13    5    3
table(p3$Individual_eff)
## 
##    0    1    2    3    4    5    6    7 
## 8523 1078  171   35   49    7    2    2
# Check the column names in p3
colnames(p3)
##  [1] "Post_oldid"            "Real_ID"               "date"                 
##  [4] "COVID_Post"            "Risk_Factor"           "Social_Disparities"   
##  [7] "Debunk_MI"             "Uncertainty_Reduction" "Testing"              
## [10] "Vaccine"               "Mental_8_YesNo"        "Emotion_9_Number"     
## [13] "Praise_10_Number"      "Pop_11_Number"         "Language_12_Number"   
## [16] "Pre_Action"            "Positive_eff"          "Negative_eff"         
## [19] "Individual_eff"        "Collective_eff"        "State_recode"         
## [22] "Gov_poli"              "Trifecta"              "White_percent"        
## [25] "Minority_percent"      "Poverty_percent"       "Prosperity_ranking"   
## [28] "year"                  "month"                 "day"                  
## [31] "state"                 "cases"
# Check the data types in p3 for relevant columns
str(p3[c("Risk_Factor", "Social_Disparities", "Debunk_MI", "Uncertainty_Reduction")])
## tibble [9,867 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Risk_Factor          : num [1:9867] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Social_Disparities   : num [1:9867] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Debunk_MI            : num [1:9867] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Uncertainty_Reduction: num [1:9867] 1 1 1 1 1 1 0 1 1 1 ...
# Modify the code to select the columns
p2 <- p3 %>%
  mutate(
    Risk_Factor = as.factor(Risk_Factor),
    Social_Disparities = as.factor(Social_Disparities),
    Debunk_MI = as.factor(Debunk_MI),
    Uncertainty_Reduction = as.factor(Uncertainty_Reduction)
  )
p2
## # A tibble: 9,867 × 32
##    Post_oldid Real_ID date   COVID_Post Risk_Factor Social_Disparities Debunk_MI
##    <chr>        <dbl> <chr>       <dbl> <fct>       <fct>              <fct>    
##  1 1             2069 9/22/…          1 0           0                  0        
##  2 2             2070 8/27/…          1 0           0                  0        
##  3 3             2071 12/29…          1 0           0                  0        
##  4 4             2072 9/29/…          1 0           0                  0        
##  5 5             2073 10/6/…          1 0           0                  0        
##  6 6             2074 12/31…          1 0           0                  0        
##  7 7              403 4/2/20          1 0           0                  0        
##  8 8             5425 12/1/…          1 0           0                  0        
##  9 9             5842 12/20…          1 0           0                  0        
## 10 10            2075 3/5/20          1 0           0                  0        
## # ℹ 9,857 more rows
## # ℹ 25 more variables: Uncertainty_Reduction <fct>, Testing <dbl>,
## #   Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## #   Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## #   Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## #   Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## #   Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
# write a csv file of the p2
write.csv(p2, file = "p2.csv", row.names = FALSE)

===== NEW====

glimpse(p3) 
## Rows: 9,867
## Columns: 32
## $ Post_oldid            <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID               <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date                  <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action            <dbl> 0, 0, 0, 0, 0, 0, 7, 0, 1, 1, 1, 1, 0, 1, 1, 3, …
## $ Positive_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode          <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli              <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta              <dbl> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent         <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent      <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent       <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking    <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year                  <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month                 <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day                   <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state                 <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases                 <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
colnames(p2)
##  [1] "Post_oldid"            "Real_ID"               "date"                 
##  [4] "COVID_Post"            "Risk_Factor"           "Social_Disparities"   
##  [7] "Debunk_MI"             "Uncertainty_Reduction" "Testing"              
## [10] "Vaccine"               "Mental_8_YesNo"        "Emotion_9_Number"     
## [13] "Praise_10_Number"      "Pop_11_Number"         "Language_12_Number"   
## [16] "Pre_Action"            "Positive_eff"          "Negative_eff"         
## [19] "Individual_eff"        "Collective_eff"        "State_recode"         
## [22] "Gov_poli"              "Trifecta"              "White_percent"        
## [25] "Minority_percent"      "Poverty_percent"       "Prosperity_ranking"   
## [28] "year"                  "month"                 "day"                  
## [31] "state"                 "cases"
table(p3$Pre_Action)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##  991 4691 1800  996  494  313  206  103   70   25   62   23    3    4    4    5 
##   16   17   18   25   26   88 
##    5    1    4    1    1   65
na_check_Risk_Factor <- any(is.na(p2$Risk_Factor))
na_check_Risk_Factor
## [1] FALSE
na_check_Social_Disparities <- any(is.na(p2$Social_Disparities))
na_check_Social_Disparities 
## [1] FALSE
na_check_Debunk_MI <- any(is.na(p2$Debunk_MI))
na_check_Debunk_MI
## [1] FALSE
na_check_Uncertainty_Reduction <- any(is.na(p2$Uncertainty_Reduction))
na_check_Uncertainty_Reduction 
## [1] FALSE

======== #RQ1: To what extent do the state health departments communicate about risks (i.e., uncertainty reduction, health risk factors, misinformation correction and disparities) related to COVID-19 on their Facebook accounts between January and December 2020?

table(p2$Risk_Factor)
## 
##    0    1 
## 8524 1343
table(p2$Social_Disparities)
## 
##    0    1 
## 9691  176
table(p2$Debunk_MI )
## 
##    0    1 
## 9432  435
table(p2$Uncertainty_Reduction)
## 
##    0    1 
##  478 9389

#RQ2:To what extent do the state health departments communicate outcome expectancies (2a)? What are the differences between the use of different types of expectancies (positive or negative and individual versus collective) on state health departments’ Facebook accounts between January and December 2020 (2b)?

# Assuming you have two vectors: p2$Positive_eff and p2$Negative_eff
# Perform a two-sample t-test
result_t_test1 <- t.test(p2$Positive_eff, p2$Negative_eff)

# Print the results
print(result_t_test1)
## 
##  Welch Two Sample t-test
## 
## data:  p2$Positive_eff and p2$Negative_eff
## t = 61.087, df = 10669, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.5894580 0.6285414
## sample estimates:
##  mean of x  mean of y 
## 0.62916793 0.02016824

#parameters

model_parameters(result_t_test1)
## Welch Two Sample t-test
## 
## Parameter1      |      Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference |       95% CI | t(10669.10) |      p
## ------------------------------------------------------------------------------------------------------------------------
## p2$Positive_eff | p2$Negative_eff |            0.63 |            0.02 |       0.61 | [0.59, 0.63] |       61.09 | < .001
## 
## Alternative hypothesis: true difference in means is not equal to 0

z-test “Collective_eff - Individual_eff”

result_t_test2 <- t.test(p2$Collective_eff, p2$Individual_eff)
result_t_test2
## 
##  Welch Two Sample t-test
## 
## data:  p2$Collective_eff and p2$Individual_eff
## t = 28.889, df = 16935, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2690552 0.3082226
## sample estimates:
## mean of x mean of y 
## 0.4692409 0.1806020

#parameters

model_parameters(result_t_test2)
## Welch Two Sample t-test
## 
## Parameter1        |        Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference |       95% CI | t(16934.97) |      p
## ----------------------------------------------------------------------------------------------------------------------------
## p2$Collective_eff | p2$Individual_eff |            0.47 |            0.18 |       0.29 | [0.27, 0.31] |       28.89 | < .001
## 
## Alternative hypothesis: true difference in means is not equal to 0

#RQ3:To what extent do state health departments incorporate a set of risk persuasion techniques (i.e., uncertainty reduction, health risk factors, misinformation correction and disparities) in their call-to-action regarding COVID-19 between January and December 2020?

df <- p3 %>%
  mutate(
    Pre_Action = ifelse(Pre_Action == 0 | Pre_Action == 88, 0, 1), 
    New_Risk = ifelse(Risk_Factor + Social_Disparities + Debunk_MI + Uncertainty_Reduction == 0, 0, 1), 
    New_Eff = ifelse(Positive_eff  + Negative_eff + Individual_eff + Collective_eff == 0, 0, 1)
  ) %>%
  filter(Pre_Action == 1)
df
## # A tibble: 8,811 × 34
##    Post_oldid Real_ID date   COVID_Post Risk_Factor Social_Disparities Debunk_MI
##    <chr>        <dbl> <chr>       <dbl>       <dbl>              <dbl>     <dbl>
##  1 7              403 4/2/20          1           0                  0         0
##  2 9             5842 12/20…          1           0                  0         0
##  3 10            2075 3/5/20          1           0                  0         0
##  4 11            2076 4/22/…          1           0                  0         0
##  5 12            4003 4/17/…          1           0                  0         0
##  6 14            9189 4/30/…          1           0                  0         0
##  7 15            9190 4/30/…          1           0                  0         0
##  8 16             404 12/12…          1           1                  0         0
##  9 20            4004 5/15/…          1           0                  0         0
## 10 21            9191 10/12…          1           0                  0         0
## # ℹ 8,801 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## #   Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## #   Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## #   Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## #   Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## #   Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
glimpse(df)
## Rows: 8,811
## Columns: 34
## $ Post_oldid            <chr> "7", "9", "10", "11", "12", "14", "15", "16", "2…
## $ Real_ID               <dbl> 403, 5842, 2075, 2076, 4003, 9189, 9190, 404, 40…
## $ date                  <chr> "4/2/20", "12/20/20", "3/5/20", "4/22/20", "4/17…
## $ COVID_Post            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor           <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Social_Disparities    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, …
## $ Testing               <dbl> 88, 1, 88, 88, 88, 1, 1, 88, 88, 88, 88, 88, 88,…
## $ Vaccine               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 1, 8…
## $ Mental_8_YesNo        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Pop_11_Number         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, …
## $ Language_12_Number    <dbl> 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, …
## $ Pre_Action            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Positive_eff          <dbl> 0, 0, 1, 1, 0, 0, 0, 2, 0, 1, 1, 0, 1, 1, 0, 1, …
## $ Negative_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, …
## $ Collective_eff        <dbl> 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ State_recode          <dbl> 2, 39, 5, 5, 9, 51, 51, 2, 9, 51, 51, 39, 51, 51…
## $ Gov_poli              <dbl> 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, …
## $ Trifecta              <dbl> 3, 1, 2, 2, 1, 2, 2, 3, 1, 2, 2, 1, 2, 2, 2, 2, …
## $ White_percent         <dbl> 60.2, 71.4, 36.5, 36.5, 53.2, 37.5, 37.5, 60.2, …
## $ Minority_percent      <dbl> 39.8, 28.6, 63.5, 63.5, 46.8, 62.5, 62.5, 39.8, …
## $ Poverty_percent       <dbl> 10.2, 11.6, 11.8, 11.8, 12.7, 14.1, 14.1, 10.2, …
## $ Prosperity_ranking    <dbl> 39, 18, 25, 25, 31, 8, 8, 39, 31, 8, 8, 18, 8, 8…
## $ year                  <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month                 <dbl> 4, 12, 3, 4, 4, 4, 4, 12, 5, 10, 10, 12, 8, 5, 1…
## $ day                   <dbl> 2, 20, 5, 22, 17, 30, 30, 12, 15, 12, 22, 11, 13…
## $ state                 <chr> "AK", "RI", "CA", "CA", "FL", "DC", "DC", "AK", …
## $ cases                 <dbl> 3, 0, 12, 1729, 1413, 217, 217, 520, 928, 38, 39…
## $ New_Risk              <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, …
## $ New_Eff               <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, …
table(df$Pre_Action)
## 
##    1 
## 8811
table(df$New_Risk)
## 
##    0    1 
##  233 8578
table(df$New_Eff)
## 
##    0    1 
## 4898 3913
# Define the APA-style theme
theme_apa <- function() {
  theme_minimal() +
    theme(
      text = element_text(family = "Times New Roman", size = 10),
      plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
      axis.title = element_text(size = 12, face = "bold"),
      axis.text = element_text(size = 10),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_blank(),
      panel.background = element_blank(),
      legend.position = "right",
      legend.text = element_text(size = 12), 
      axis.line = element_line(color = "black")
    )
}

Run Chi-Square for New_Risk

# Create a contingency table
contingency_New_Risk <- table(df$New_Risk)
# Perform the chi-square test
chi_square_New_Risk <- chisq.test(contingency_New_Risk)
# Print the results
chi_square_New_Risk
## 
##  Chi-squared test for given probabilities
## 
## data:  contingency_New_Risk
## X-squared = 7903.6, df = 1, p-value < 2.2e-16
model_parameters(chi_square_New_Risk)
## Chi-squared test for given probabilities
## 
## Chi2(1) |      p
## ----------------
## 7903.65 | < .001
# Create a data frame with the Chi-squared statistic and p-value
results <- data.frame(Test = "chi_square_New_Risk", Chi2 = chi_square_New_Risk$statistic, p = chi_square_New_Risk$p.value)

# Create a horizontal bar chart-like visualization
library(ggplot2)
ggplot(results, aes(x = Test, y = Chi2, fill = p < 0.001)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "Chi-squared test for given probabilities",
       y = "Chi-squared statistic") +
  scale_fill_manual(values = c("TRUE" = "purple", "FALSE" = "blue")) +
  coord_flip() +
  theme_apa()

============

#RQ4: To what extent do state-level poverty, percentage of minority populations and government trifecta predict state health departments’ risk persuasion techniques (i.e., uncertainty reduction, health risk factors, misinformation correction and disparities) (4a), outcome expectancies (4b) and call-to-action (4c) on state health departments’ Facebook accounts between January and December 2020?

df1 <- p3 %>% 
   mutate(
    Pre_Action = ifelse(Pre_Action == 0 | Pre_Action == 88, 0, 1), 
    New_Risk = ifelse(Risk_Factor + Social_Disparities + Debunk_MI + Uncertainty_Reduction == 0, 0, 1), 
    New_Eff = ifelse(Positive_eff  + Negative_eff + Individual_eff + Collective_eff == 0, 0, 1)
  ) 
df1
## # A tibble: 9,867 × 34
##    Post_oldid Real_ID date   COVID_Post Risk_Factor Social_Disparities Debunk_MI
##    <chr>        <dbl> <chr>       <dbl>       <dbl>              <dbl>     <dbl>
##  1 1             2069 9/22/…          1           0                  0         0
##  2 2             2070 8/27/…          1           0                  0         0
##  3 3             2071 12/29…          1           0                  0         0
##  4 4             2072 9/29/…          1           0                  0         0
##  5 5             2073 10/6/…          1           0                  0         0
##  6 6             2074 12/31…          1           0                  0         0
##  7 7              403 4/2/20          1           0                  0         0
##  8 8             5425 12/1/…          1           0                  0         0
##  9 9             5842 12/20…          1           0                  0         0
## 10 10            2075 3/5/20          1           0                  0         0
## # ℹ 9,857 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## #   Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## #   Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## #   Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## #   Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## #   Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
df1 
## # A tibble: 9,867 × 34
##    Post_oldid Real_ID date   COVID_Post Risk_Factor Social_Disparities Debunk_MI
##    <chr>        <dbl> <chr>       <dbl>       <dbl>              <dbl>     <dbl>
##  1 1             2069 9/22/…          1           0                  0         0
##  2 2             2070 8/27/…          1           0                  0         0
##  3 3             2071 12/29…          1           0                  0         0
##  4 4             2072 9/29/…          1           0                  0         0
##  5 5             2073 10/6/…          1           0                  0         0
##  6 6             2074 12/31…          1           0                  0         0
##  7 7              403 4/2/20          1           0                  0         0
##  8 8             5425 12/1/…          1           0                  0         0
##  9 9             5842 12/20…          1           0                  0         0
## 10 10            2075 3/5/20          1           0                  0         0
## # ℹ 9,857 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## #   Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## #   Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## #   Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## #   Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## #   Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
glimpse(df1)
## Rows: 9,867
## Columns: 34
## $ Post_oldid            <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID               <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date                  <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action            <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ Positive_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode          <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli              <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta              <dbl> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent         <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent      <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent       <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking    <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year                  <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month                 <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day                   <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state                 <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases                 <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
## $ New_Risk              <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ New_Eff               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, …
table(df1$Pre_Action)
## 
##    0    1 
## 1056 8811
table(df1$New_Risk)
## 
##    0    1 
##  392 9475
table(df1$New_Eff)
## 
##    0    1 
## 5801 4066
df2 <- df1 %>% 
   #mutate_at(vars(Pre_Action, New_Risk, New_Eff), ~ifelse(. == 0, 0, 1)) %>  
   mutate(Trifecta = as.factor(Trifecta))
df2  
## # A tibble: 9,867 × 34
##    Post_oldid Real_ID date   COVID_Post Risk_Factor Social_Disparities Debunk_MI
##    <chr>        <dbl> <chr>       <dbl>       <dbl>              <dbl>     <dbl>
##  1 1             2069 9/22/…          1           0                  0         0
##  2 2             2070 8/27/…          1           0                  0         0
##  3 3             2071 12/29…          1           0                  0         0
##  4 4             2072 9/29/…          1           0                  0         0
##  5 5             2073 10/6/…          1           0                  0         0
##  6 6             2074 12/31…          1           0                  0         0
##  7 7              403 4/2/20          1           0                  0         0
##  8 8             5425 12/1/…          1           0                  0         0
##  9 9             5842 12/20…          1           0                  0         0
## 10 10            2075 3/5/20          1           0                  0         0
## # ℹ 9,857 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## #   Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## #   Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## #   Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## #   Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## #   Gov_poli <dbl>, Trifecta <fct>, White_percent <dbl>, …
glimpse(df2)
## Rows: 9,867
## Columns: 34
## $ Post_oldid            <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID               <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date                  <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine               <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action            <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ Positive_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode          <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli              <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta              <fct> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent         <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent      <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent       <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking    <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year                  <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month                 <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day                   <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state                 <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases                 <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
## $ New_Risk              <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ New_Eff               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, …
table(df2$Pre_Action)
## 
##    0    1 
## 1056 8811
table(df2$New_Risk)
## 
##    0    1 
##  392 9475
table(df2$New_Eff)
## 
##    0    1 
## 5801 4066

#CTA

library(lmtest)
model1 <- glm(Pre_Action ~ Trifecta + Minority_percent + Poverty_percent,
              family = binomial,
              data = df2)
summary(model1)
## 
## Call:
## glm(formula = Pre_Action ~ Trifecta + Minority_percent + Poverty_percent, 
##     family = binomial, data = df2)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.260297   0.173202   7.276 3.43e-13 ***
## Trifecta2        -0.356837   0.092199  -3.870 0.000109 ***
## Trifecta3         0.119049   0.102382   1.163 0.244915    
## Minority_percent  0.016147   0.002765   5.841 5.20e-09 ***
## Poverty_percent   0.032139   0.014402   2.232 0.025642 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6714.4  on 9866  degrees of freedom
## Residual deviance: 6651.7  on 9862  degrees of freedom
## AIC: 6661.7
## 
## Number of Fisher Scoring iterations: 5
model_parameters(model1, exponentiate = TRUE)
## Parameter        | Odds Ratio |       SE |       95% CI |     z |      p
## ------------------------------------------------------------------------
## (Intercept)      |       3.53 |     0.61 | [2.51, 4.95] |  7.28 | < .001
## Trifecta [2]     |       0.70 |     0.06 | [0.58, 0.84] | -3.87 | < .001
## Trifecta [3]     |       1.13 |     0.12 | [0.92, 1.38] |  1.16 | 0.245 
## Minority percent |       1.02 | 2.81e-03 | [1.01, 1.02] |  5.84 | < .001
## Poverty percent  |       1.03 |     0.01 | [1.00, 1.06] |  2.23 | 0.026
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.

Create individual ggpredict plots and assign them to variables

plot1 <- ggpredict(M2b1_a, “Trifecta”) |> plot() + theme_apa() + labs(title = “Predicted POE by GT”, x = “GT”, y = “POE”) + scale_x_discrete(labels = c(“No”, “Yes”))

library(ggeffects)
library(ggplot2)
library(cowplot)


# Create individual ggpredict plots and assign them to variables
plot1 <- ggpredict(model1, "Trifecta") |> plot() + theme_apa() +
  labs(title = "Predicted Call-to-action by Government Trifecta",
       x = "Government Trifecta",
       y = "Call-to-action") +
  scale_x_discrete(labels = c("No", "Yes"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
plot2 <- ggpredict(model1, terms = "Minority_percent [all]") |> plot() + theme_apa() +
  labs(title = "Predicted CTA by Minority Percentage",
       x = "Minority Percentage %",
       y = "Call-to-action")

plot3 <- ggpredict(model1, terms = "Poverty_percent [all]") |> plot() + theme_apa() +
  labs(title = "Predicted CTA by Poverty Percentage",
       x = "Poverty Percentage %",
       y = "Call-to-action")

# Arrange the plots in a grid
combined_plot1 <- plot_grid(plot1, plot2, plot3, ncol = 1)
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
# Print the combined plot
print(combined_plot1)

# Save the combined boxplot as a PNG file
ggsave("New_SED_CTA.png", plot = combined_plot1, width = 8, height = 4, units = "in")

#Risk

model2<- glm(New_Risk ~ Trifecta + Minority_percent + Poverty_percent,
                 family = binomial,
                 data = df2)
summary(model2)
## 
## Call:
## glm(formula = New_Risk ~ Trifecta + Minority_percent + Poverty_percent, 
##     family = binomial, data = df2)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       4.535020   0.267777  16.936  < 2e-16 ***
## Trifecta2        -0.868950   0.153664  -5.655 1.56e-08 ***
## Trifecta3        -0.041518   0.188737  -0.220    0.826    
## Minority_percent  0.005354   0.004503   1.189    0.235    
## Poverty_percent  -0.089013   0.021706  -4.101 4.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3297.2  on 9866  degrees of freedom
## Residual deviance: 3226.3  on 9862  degrees of freedom
## AIC: 3236.3
## 
## Number of Fisher Scoring iterations: 6
model_parameters(model2,  exponentiate = TRUE)
## Parameter        | Odds Ratio |       SE |          95% CI |     z |      p
## ---------------------------------------------------------------------------
## (Intercept)      |      93.23 |    24.96 | [55.25, 157.91] | 16.94 | < .001
## Trifecta [2]     |       0.42 |     0.06 | [ 0.31,   0.57] | -5.65 | < .001
## Trifecta [3]     |       0.96 |     0.18 | [ 0.67,   1.41] | -0.22 | 0.826 
## Minority percent |       1.01 | 4.53e-03 | [ 1.00,   1.01] |  1.19 | 0.235 
## Poverty percent  |       0.91 |     0.02 | [ 0.88,   0.95] | -4.10 | < .001
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
# Create individual ggpredict plots and assign them to variables
plot1 <- ggpredict(model2, "Trifecta") |> plot() + theme_apa() +
  labs(title = "Predicted Risks by Government Trifecta",
       x = "Government Trifecta",
       y = "Risks") +
  scale_x_discrete(labels = c("No", "Yes"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
plot2 <- ggpredict(model2, terms = "Minority_percent [all]") |> plot() + theme_apa() +
  labs(title = "Predicted Risks by Minority Percentage",
       x = "Minority Percentage %",
       y = "Risks")

plot3 <- ggpredict(model2, terms = "Poverty_percent [all]") |> plot() + theme_apa() +
  labs(title = "Predicted Risks by Poverty Percentage",
       x = "Poverty Percentage %",
       y = "Risks")


# Arrange the plots in a grid
combined_plot2 <- plot_grid(plot2, plot3, ncol = 1)

# Print the combined plot
print(combined_plot2)

# Save the combined boxplot as a PNG file
ggsave("New_SED_Risks.png", plot = combined_plot2, width = 8, height = 4, units = "in")

#Expectancy

model3<- glm(New_Eff ~ Trifecta + Minority_percent + Poverty_percent,
                 family = binomial,
                 data = df2)
summary(model3)
## 
## Call:
## glm(formula = New_Eff ~ Trifecta + Minority_percent + Poverty_percent, 
##     family = binomial, data = df2)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.277426   0.112347  -2.469   0.0135 *  
## Trifecta2        -0.646443   0.058829 -10.988  < 2e-16 ***
## Trifecta3        -0.321824   0.065790  -4.892 1.00e-06 ***
## Minority_percent  0.031020   0.001783  17.401  < 2e-16 ***
## Poverty_percent  -0.075326   0.009313  -8.088 6.04e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 13372  on 9866  degrees of freedom
## Residual deviance: 13045  on 9862  degrees of freedom
## AIC: 13055
## 
## Number of Fisher Scoring iterations: 4
model_parameters(model3,  exponentiate = TRUE)
## Parameter        | Odds Ratio |       SE |       95% CI |      z |      p
## -------------------------------------------------------------------------
## (Intercept)      |       0.76 |     0.09 | [0.61, 0.94] |  -2.47 | 0.014 
## Trifecta [2]     |       0.52 |     0.03 | [0.47, 0.59] | -10.99 | < .001
## Trifecta [3]     |       0.72 |     0.05 | [0.64, 0.82] |  -4.89 | < .001
## Minority percent |       1.03 | 1.84e-03 | [1.03, 1.04] |  17.40 | < .001
## Poverty percent  |       0.93 | 8.64e-03 | [0.91, 0.94] |  -8.09 | < .001
## 
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
##   computed using a Wald z-distribution approximation.
# Create individual ggpredict plots and assign them to variables
plot1 <- ggpredict(model3, "Trifecta") |> plot() + theme_apa() +
  labs(title = "Predicted Outcome Expectancy by Government Trifecta",
       x = "Government Trifecta",
       y = "Outcome Expectancy") +
  scale_x_discrete(labels = c("No", "Yes"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
plot2 <- ggpredict(model3, terms = "Minority_percent [all]") |> plot() + theme_apa() +
  labs(title = "Predicted Outcome Expectancy by 
       Minority Percentage",
       x = "Minority Percentage %",
       y = "Outcome Expectancy")

plot3 <- ggpredict(model3, terms = "Poverty_percent [all]") |> plot() + theme_apa() +
  labs(title = "Predicted Outcome Expectancy by 
       Poverty Percentage",
       x = "Poverty Percentage %",
       y = "Outcome Expectancy")


# Arrange the plots in a grid
combined_plot3 <- plot_grid(plot1, plot2, plot3, ncol = 1)

# Print the combined plot
print(combined_plot3)

# Save the combined boxplot as a PNG file
ggsave("New_SED_Expectancy.png", plot = combined_plot3, width = 8, height = 4, units = "in")

===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE

modela <- glm(Pre_Action ~ cases, family = binomial, data = df2) summary(modela)

modelb <- glm(New_Risk ~ cases, family = binomial, data = df2) summary(modelb)

modelc <- glm(New_Eff ~ cases, family = binomial, data = df2) summary(modelc)